
*-------------------------------------------------------------------------------
*
* Create an output file to plot the relationship between AKM
* establishment fixed effects and establishment utility values estimated as in 
* Sorkin (QJE 2018)
* We estimate rents and cd using this relationship.
* Combined output file is: "$results/firm_effects_ver$ver"
*-------------------------------------------------------------------------------


*-------------------------------------------------------------------------------
*Step 1: Input the value data
*-------------------------------------------------------------------------------

import delimited using "$matlab\data_new\exp_V_ver$ver.csv", clear
gen double betnr =v1
gen double exp_v =v2
drop v1 v2

gen double V = ln(exp_v)
drop exp_v
scalar V_n = V[1] //value of non-employment
gen V_adj = V-V_n
save "$temp/values_new_ver$ver.dta", replace

use "$temp/workers_currentid_year_ver$ver.dta", clear
gen workers_m = workers
collapse (sum) workers (mean) workers_m, by(currentid)
rename  currentid betnr
compress
save "$temp/workers_betnr.dta", replace
*use "$temp/workers_betnr.dta", clear

use "$temp/values_new_ver$ver", clear
merge 1:1 betnr using "$temp/workers_betnr.dta"
drop if _merge == 2
drop _merge

erase "$temp/workers_betnr.dta" // new

*Compute the V-psi relationship
merge 1:1 betnr using $temp/AKM_wages_ver$ver.dta, keepusing(firm_fe)
keep if _merge==3 //we lose cases here because not all person spells are in the connected set
drop _merge


*-------------------------------------------------------------------------------
*Determine the psi-V slope in the microdata
*-------------------------------------------------------------------------------

keep betnr V V_adj firm_fe

*Keep relevant obs for the graph and regression
drop if missing(firm_fe) //drop if no estimated FE
rename V_adj value

*Studentize the regressors
egen std_fe = std(firm_fe)
egen std_V  = std(value)


*****************************


cap drop mean sd p 
cap drop value_p bin_value
gen mean = .
gen sd = . 
gen p = _n
replace p = . if p > 100
gen value_p = .
gen bin_value = .
qui {
forv p = 0(1)98 {
	local pp = `p' + 1
	dis as text "`p' to `pp'"
	centile std_V, centile(`p' `pp')
	replace value_p = `pp' if std_V >= r(c_1) & std_V < r(c_2)
	su std_fe if std_V >= r(c_1) & std_V < r(c_2), de
	
	replace mean = r(mean) in `pp'
	replace sd   = r(sd)/sqrt(r(N)) in `pp'
	
	centile std_V, centile(`p' `pp')
	su std_V if std_V >= r(c_1) & std_V < r(c_2)
	replace bin_value = r(mean) in `pp'
 }
} 
dis as text "95 to 100"
local p  = 99
local pp = 100 
centile std_V, centile(`p' `pp')
replace value_p = `pp' if std_V >= r(c_1) & std_V <= r(c_2)
su std_fe if std_V >= r(c_1) & std_V <= r(c_2), de
replace mean = r(mean) in `pp'
replace sd = r(sd)/sqrt(r(N)) in `pp' 
centile std_V, centile(`p' `pp')
su std_V if std_V >= r(c_1) & std_V <= r(c_2)
replace bin_value = r(mean) in `pp'
	
graph twoway (scatter mean bin_value, mfcolor(none) ) (lfit mean bin_value),ytitle(Employer effects in earnings) xtitle(Value)     
graph export "$log/V_fe_p1_1_100_ver$ver.pdf", replace

drop if value_p ==   1
drop if value_p == 100 | missing(value_p)

graph twoway (scatter mean bin_value if inrange(p, 2,99), mfcolor(none) ) (lfit mean bin_value if inrange(p, 2,99)),ytitle(Employer effects in earnings) xtitle(Value)     
graph export "$log/V_fe_p1_2_99_ver$ver.pdf", replace

drop mean sd p bin_value



*****************************


*Run psi-V regression

*Save slope for the sector based aggregate output
reg std_fe std_V, r
matrix b = e(b)
global beta = b[1,1]

twoway lfit std_fe std_V || lpoly std_fe std_V || hist std_V
su std_V, de

*Save coefficients for main analyses
reg firm_fe value
matrix b = e(b)
predict cd, residuals
predict rents, xb

preserve
	clear
	svmat b
	rename b1 std_V
	rename b2 cons
	gen method = "All OLS (trimmed)"
	order method std_V cons
	save $temp/psiV_slopes_ver$ver, replace
restore	

corr std_fe std_V
su std_fe, de 
su std_V, de

preserve
label var value "value, centered"
label var cd "compensating differential"
label var rents "rents"
save $temp/firmfe_value_new_ver$ver, replace
restore

*-------------------------------------------------------------------------------
* Step 2: Determine the psi-V slope in binned cells & aggregate to cell level
*-------------------------------------------------------------------------------

*Bin by percentiles 
centile std_V, centile(1(1)99)

matrix centiles = J(99,1,.)
forvalues i = 1/99{
	matrix centiles[`i',1] = r(c_`i')
}

*generate bins
gen double V_bin=.
gen double V_binval = .
gen double V_binsize = .

replace V_bin=1 if std_V<=centiles[1,1]
qui sum std_V if V_bin==1
replace V_binval = r(mean) if V_bin==1
replace V_binsize = r(N) if V_bin==1

forvalues i =2/99{
	local j = `i'-1
	replace V_bin=`i' if std_V<=centiles[`i',1] & std_V>centiles[`j',1]
	qui sum std_V if V_bin==`i'
	replace V_binval = r(mean) if V_bin==`i'
	replace V_binsize = r(N) if V_bin==`i'
}

replace V_bin=100 if std_V>centiles[99,1]
qui sum std_V if V_bin==100
replace V_binval = r(mean) if V_bin==100
replace V_binsize = r(N) if V_bin==100

collapse (mean) av_std_fe= std_fe ///
(sd) sd_std_fe= std_fe ///
(mean) V_binval = V_binval ///
(mean) V_binsize = V_binsize, by(V_bin)


gen lb_av_std_fe = av_std_fe-sd_std_fe
gen ub_av_std_fe = av_std_fe+sd_std_fe

order V_bin V_binval V_binsize av_std_fe ///
sd_std_fe lb_av_std_fe ub_av_std_fe

*Output collapsed data
export delimited using $results/V_fe_p5_2_99_ver$ver.csv, replace

twoway (scatter av_std_fe V_binval, mcolor(blue) msymbol(circle_hollow) mlwidth(medthick)) /// 
(line ub_av_std_fe V_binval, lcolor(red) lpattern(vshortdash)) /// 
(line lb_av_std_fe V_binval, lcolor(red) lpattern(vshortdash)) /// 
(lfit av_std_fe V_binval, lcolor(black) lwidth(medthick)), /// 
ytitle(Employer fixed effect ({&psi})) xtitle(Employer value (V)) legend(off) graphr(color(gs16))
graph export "$graph/V_fe_p1_2_99_ver$ver.pdf", replace

save "$graph/V_fe_p1_2_99_ver$ver.dta", replace

*-------------------------------------------------------------------------------
* Step 3: Aggregate psi-V to the sector (2 digit industry level)
*-------------------------------------------------------------------------------

use "$temp/wz.dta", clear

merge 1:1 betnr using  "$temp/firmfe_value_new_ver$ver"
keep if _merge==3
drop _merge

tab sector, mis
drop if sector == "Missing sector"
drop if strpos(sector, "T:")>0
drop if strpos(sector, "U:")>0
tab sector, mis


save "$results/firm_effects_ver$ver", replace

*****************************************
*****************************************

*Average values and FE's to the sector level 
collapse (mean) av_v=std_V av_fe=std_fe, by(sector) 

*Line of best fit 
gen avrent = $beta * av_v

*Output collapsed data
outsheet using "$results/firmfe_value_sector_ver$ver.csv", comma replace

twoway (line avrent av_v, lpattern(shortdash) lcolor(black)) (scatter av_fe av_v, mlabel(sector)), ///
ytitle(Average employer fixed effect) xtitle(Average value) legend(off) graphr(color(gs16)) ysc(r(-1.5 1.5)) xsc(r(-1.5 1.5))
graph export "$graph/sector_map_ver$ver.pdf", replace

twoway (line avrent av_v if sector != "78.*: Employment Activities (normaly part of N)", lpattern(shortdash) lcolor(black)) (scatter av_fe av_v if sector != "78.*: Employment Activities (normaly part of N)", mlabel(sector)), ///
ytitle(Average employer fixed effect) xtitle(Average employer value) legend(off) graphr(color(gs16)) ysc(r(-1.5 1.5)) xsc(r(-1.5 1.5))
graph export "$graph/sector_map_wo_78_ver$ver.pdf", replace


*Assign 3 digit industry to sector
replace sector = "A" if strpos(sector, "A:")>0
replace sector = "B" if strpos(sector, "B:")>0
replace sector = "C" if strpos(sector, "C:")>0
replace sector = "D" if strpos(sector, "D:")>0
replace sector = "E" if strpos(sector, "E:")>0
replace sector = "F" if strpos(sector, "F:")>0
replace sector = "G" if strpos(sector, "G:")>0
replace sector = "H" if strpos(sector, "H:")>0
replace sector = "I" if strpos(sector, "I:")>0
replace sector = "J" if strpos(sector, "J:")>0
replace sector = "K" if strpos(sector, "K:")>0
replace sector = "L" if strpos(sector, "L:")>0
replace sector = "M" if strpos(sector, "M:")>0
replace sector = "78.*"   if strpos(sector, "78.*: Em")>0
replace sector = "N-78.*" if strpos(sector, "N-78.*")>0
replace sector = "O" if strpos(sector, "O:")>0
replace sector = "P" if strpos(sector, "P:")>0
replace sector = "Q" if strpos(sector, "Q:")>0
replace sector = "R" if strpos(sector, "R:")>0
replace sector = "S" if strpos(sector, "S:")>0
replace sector = "T" if strpos(sector, "T:")>0
replace sector = "U" if strpos(sector, "U:")>0
replace sector = "Missing sector" if sector == "Missing sector"


twoway ///
(line    avrent av_v if sector != "78.*", lpattern(shortdash) lcolor(black)) ///
(scatter  av_fe av_v if sector != "78.*", mlabel(sector) msymbol(circle_hollow)), ///
ytitle(Average employer fixed effect) xtitle(Average employer value) legend(off) graphr(color(gs16)) ysc(r(-1.5 1.5)) xsc(r(-1.5 1.5)) xlabel(-1.5(.5)1.5) ylabel(-1.5(.5)1.5)
graph export "$graph/sector_map_wo_78_letters_ver$ver.pdf", replace

twoway ///
(line    avrent av_v , lpattern(shortdash) lcolor(black)) ///
(scatter  av_fe av_v , mlabel(sector) msymbol(circle_hollow)), ///
ytitle(Average employer fixed effect) xtitle(Average employer value) legend(off) graphr(color(gs16)) ysc(r(-1.5 1.5)) xsc(r(-1.5 1.5)) xlabel(-1.5(.5)1.5) ylabel(-1.5(.5)1.5)
graph export "$graph/sector_map_letters_ver$ver.pdf", replace

log close
